Theme Song
FRA : Elle me dit
CHN : 她对我说
ENG : She told me
JPN : 彼女は私に言う
FRA : écris une chanson contente
CHN : 写一首欢快的歌
ENG : Write a happy song
JPN : 幸せな歌を書く
FRA : Pas une chanson déprimante
CHN : 而不是悲伤的歌
ENG : Not a depressing song
JPN : 気のめいるような歌ではない
FRA : Une chanson que tout le monde aime
CHN : 一首让所有人都喜欢的歌
ENG : A song that everyone loves
JPN : みんなが大好きな曲
FRA : Elle me dit
CHN : 她对我说
ENG : She told me
JPN : 彼女は私に言う
FRA : Tu deviendras milliardaire
CHN : 你将成为亿万富翁
ENG : You will become a millionaire
JPN : あなたは億万長者になります
FRA : Tu auras de quoi être fier
CHN : 你将为此感到骄傲
ENG : You will be proud
JPN : あなたは誇りに思うでしょう
Due to below issues from Deriv.com - Interday High Frequency Trading Models Comparison Blooper, here I review the research by using same dataset as we can know from below Part I.
In this paper I am using sarima models.
Load Packages
if(!suppressPackageStartupMessages(require('BBmisc'))) {
install.packages('BBmisc', dependencies = TRUE, INSTALL_opts = '--no-lock')
}
suppressPackageStartupMessages(require('BBmisc'))
# suppressPackageStartupMessages(require('rmsfuns'))
pkgs <- c('devtools', 'knitr', 'kableExtra', 'tint', 'furrr', 'tidyr',
'devtools','readr', 'lubridate', 'data.table', 'reprex',
'feather', 'purrr', 'quantmod', 'tidyquant', 'tibbletime',
'timetk', 'plyr', 'dplyr', 'stringr', 'magrittr', 'tdplyr',
'tidyverse', 'memoise', 'htmltools', 'formattable', 'dtplyr',
'zoo', 'forecast', 'seasonal', 'seasonalview', 'rjson',
'rugarch', 'rmgarch', 'mfGARCH', 'sparklyr', 'jcolors',
'microbenchmark', 'dendextend', 'lhmetools', 'ggthemr',
'stringr', 'pacman', 'profmem', 'ggthemes', 'flyingfox',
'htmltools', 'echarts4r', 'viridis', 'hrbrthemes',
'fable', 'fabletools', 'Rfast', 'Metrics', 'MLmetrics')
suppressAll(lib(pkgs))
# load_pkg(pkgs)
.dtr <- 'C:/Users/User/Documents/GitHub/binary.com-interview-question-data/'
## Set the timezone but not change the datetime
Sys.setenv(TZ = 'Asia/Tokyo')
## options(knitr.table.format = 'html') will set all kableExtra tables to be 'html', otherwise need to set the parameter on every single table.
options(warn = -1, knitr.table.format = 'html')#, digits.secs = 6)
## https://stackoverflow.com/questions/39417003/long-vectors-not-supported-yet-abnor-in-rmd-but-not-in-r-script
knitr::opts_chunk$set(cache = TRUE, warning = FALSE,
message = FALSE, cache.lazy = FALSE)
rm(pkgs)
Progress Function
task_progress <- function(mbase, timeID0 = NULL, scs = 60, .pattern = '^sarima_', .loops = TRUE) {
## ------------- 定时查询进度 ----------------------
## 每分钟自动查询与更新以上模拟预测汇价进度(储存文件量)。
require('magrittr')
require('tibble')
if(!is.data.frame(class(mbase))) {
mbase %<>% data.frame
}
if (.loops == TRUE) {
while(1) {
cat('Current Tokyo Time :', as.character(now('Asia/Tokyo')), '\n\n')
y = as_date(mbase$index) %>%
unique
y <- y[weekdays(y) != 'Saturday'] #filter and omit the weekly last price which is 12:00am on saturday
datee = y
if(is.null(timeID0)) {
timeID0 = y[1]
} else if (is.Date(timeID0)) {
timeID0 = as_date(timeID0)
} else {
timeID0 = as_date(mbase$index) %>%
unique
}
y = y[y >= timeID0]
x = list.files(paste0(.dtr, 'data/fx/USDJPY/'), pattern = .pattern) %>%
str_replace_all('.rds', '') %>%
str_replace_all('.201', '_201') %>%
str_split_fixed('_', '2') %>%
as_tibble %>%
dplyr::rename('Model' = 'V1', 'Date' = 'V2') %>%
dplyr::mutate(Model = factor(Model), Date = as_date(Date))
x = join(tibble(Date = datee), x) %>%
as_tibble
x %<>% na.omit
x %<>% dplyr::mutate(binary = if_else(is.na(Model), 0, 1)) %>%
spread(Model, binary)
z <- ldply(x[,-1], function(zz) {
na.omit(zz) %>% length }) %>%
dplyr::rename(x = V1) %>%
dplyr::mutate(n = length(y), progress = percent(x/n))
print(z)
prg = sum(z$x)/sum(z$n)
cat('\n================', as.character(percent(prg)), '================\n\n')
if (prg == 1) break #倘若进度达到100%就停止更新。
Sys.sleep(scs) #以上ldply()耗时3~5秒,而休息时间60秒。
}
} else {
cat('Current Tokyo Time :', as.character(now('Asia/Tokyo')), '\n\n')
y = as_date(mbase$index) %>%
unique
datee = y
if(is.null(timeID0)) {
timeID0 = y[1]
} else if (is.Date(timeID0)) {
timeID0 = as_date(timeID0)
} else {
timeID0 = as_date(mbase$index) %>%
unique
}
y = y[y >= timeID0]
x = list.files(paste0(.dtr, 'data/fx/USDJPY/'), pattern = .pattern) %>%
str_replace_all('.rds', '') %>%
str_replace_all('.201', '_201') %>%
str_split_fixed('_', '2') %>%
as_tibble %>%
dplyr::rename('Model' = 'V1', 'Date' = 'V2') %>%
dplyr::mutate(Model = factor(Model), Date = as_date(Date))
x = join(tibble(Date = datee), x) %>%
as_tibble
x %<>% na.omit
x %<>% dplyr::mutate(binary = if_else(is.na(Model), 0, 1)) %>%
spread(Model, binary)
z <- ldply(x[,-1], function(zz) {
na.omit(zz) %>% length }) %>%
dplyr::rename(x = V1) %>%
dplyr::mutate(n = length(y), progress = percent(x/n))
print(z)
prg = sum(z$x)/sum(z$n)
cat('\n================', as.character(percent(prg)), '================\n\n')
}
}
## check if data path set
if(!exists('dtr')) {
dtr <- 'C:/Users/User/Documents/GitHub/binary.com-interview-question-data/'}
## save files if not exists
if(!file.exists(paste0(dtr, 'data/fx/USDJPY/dsmp.rds')) & exists('dsmp')) {
saveRDS(dsmp, paste0(dtr, 'data/fx/USDJPY/dsmp.rds'))}
## read files if not exists
if(!exists('dsmp')) {
dsmp <- readRDS(paste0(dtr, 'data/fx/USDJPY/dsmp.rds'))}
## plot sample data
dsmp[c(1:3, (nrow(dsmp)-3):nrow(dsmp)),] %>%
kbl(caption = '1 min Close Price Dataset', escape = FALSE) %>%
## https://www.w3schools.com/cssref/css_colors.asp
## https://public.tableau.com/en-us/gallery/100-color-palettes?gallery=votd
row_spec(0, background = 'DimGrey', color = 'gold', bold = TRUE) %>%
column_spec(1, background = 'CornflowerBlue') %>%
column_spec(2, background = 'Gray') %>%
column_spec(3, background = 'DarkGrey') %>%
column_spec(4, background = 'Gray') %>%
column_spec(5, background = 'DarkGrey') %>%
column_spec(6, background = '#4897D8') %>%
column_spec(7, background = '#556DAC') %>%
column_spec(8, background = '#92AAC7') %>%
column_spec(9, background = '#556DAC') %>%
column_spec(10, background = '#375E97') %>%
column_spec(11, background = 'CornflowerBlue') %>%
column_spec(12, background = 'LightGray', color = 'goldenrod') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
kable_material(full_width = FALSE) %>% ##`full_width = FALSE` will auto adjust every single columns width to fit the table full width.
scroll_box(width = '100%', fixed_thead = TRUE, height = '400px')
| index | year | quarter | month | week | wkdays | wk_1m | dy_1m | hr_1m | sq | date | close |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2015-01-05 00:01:00 | 2015 | 1 | 1 | 1 | Monday | 1 | 1 | 1 | 1 | 2015-01-05 | 120.5740 |
| 2015-01-05 00:02:00 | 2015 | 1 | 1 | 1 | Monday | 2 | 2 | 2 | 2 | 2015-01-05 | 120.5900 |
| 2015-01-05 00:03:00 | 2015 | 1 | 1 | 1 | Monday | 3 | 3 | 3 | 3 | 2015-01-05 | 120.6035 |
| 2018-07-06 23:57:00 | 2018 | 3 | 7 | 27 | Friday | 7197 | 1437 | 57 | 1324797 | 2018-07-06 | 110.4635 |
| 2018-07-06 23:58:00 | 2018 | 3 | 7 | 27 | Friday | 7198 | 1438 | 58 | 1324798 | 2018-07-06 | 110.4740 |
| 2018-07-06 23:59:00 | 2018 | 3 | 7 | 27 | Friday | 7199 | 1439 | 59 | 1324799 | 2018-07-06 | 110.4740 |
| 2018-07-07 00:00:00 | 2018 | 3 | 7 | 27 | Saturday | 7200 | 1440 | 60 | 1324800 | 2018-07-07 | 110.4740 |
source : 1324800 x 12
From above table, we can know the dataset gather from 2015-01-05 to 2018-07-07 which is used in Deriv.com - Interday High Frequency Trading Models Comparison Review (Part I).
Here I start my 1st forecast date from 1st trading datetime of 2016 (2016-01-04 which is 2nd year in dataset) which is used in Deriv.com - Interday High Frequency Trading Models Comparison Review (Part I).
ts() & auto.arima()I set the length of data as weekly (7200 minutes which is 5 trading days) to forecast 1440 minutes (1440 minutes is a trading day).
Here I only use 7200 to forecast 1440 as we can know that’s the optimal volume of observation data as we know from Deriv.com - Interday High Frequency Trading Models Comparison Review (Part I).
I set the length of data as weekly (5 days * 1440 mins = 7200 mins minutes which is 5 trading days) to forecast 1440 minutes (1440 minutes is a trading day).
auto.arima() from forecast packagesarima <- function(timeID, data = dsmp, data_len, hrz1 = 1440,
.model = c('auto', 'Arima'), .d = NA, .D = NA,
.order = c(0, 0, 0), .seasonal = c(0, 0, 0)) {
tmp <- llply(1:length(timeID), function(i) {
if(i == 1) {
cat('\n')
cat('===========================================\n')
cat('train[', i, ']\n')
print(train <- dsmp[date < timeID[i]][(.N - (data_len - 1)):.N])
ctr <- train$sq[1]:(range(train$sq)[2] + hrz1)
cat('\n')
cat('-------------------------------------------\n')
cat('train_test[', i, ']\n')
print(train_test <- dsmp[sq %in% ctr])
srm <- train[, .(index, close)] %>%
tk_ts(frequency = hrz1)
if(.model == 'auto') {
srm %<>% auto.arima(d = .d, D = .D)
} else if (.model == 'Arima') {
srm %<>% Arima(order = .order, seasonal = .seasonal)
}
srm %<>%
forecast(h = hrz1) %>%
tk_tbl %>%
dplyr::mutate(index = train_test[(.N - hrz1 + 1):.N,]$index,
mk.price = train_test[(.N - hrz1 + 1):.N,]$close) %>%
dplyr::rename(fc.price = `Point Forecast`) %>%
dplyr::select(index, mk.price, fc.price)
cat('\n')
cat('-------------------------------------------\n')
cat('forecast[', i, ']\n')
print(srm %>% as.data.table)
saveRDS(srm, paste0(
dtr, 'data/fx/USDJPY/sarima_', .model, '_', data_len,
'_', hrz1, '.', as_date(srm$index[1]), '.rds'))
cat('\n')
cat(i, '=', paste0('~/data/fx/USDJPY/sarima_', .model, '_',
data_len, '_', hrz1, '.',
as_date(srm$index[1]), '.rds saved!\n'))
cat('\n\n')
rm(sets)
} else if(i %in% seq(1, length(timeID), by = 6)[-1]) {
} else if(i == length(timeID)) {
} else {
lst_sq <- dsmp[date < timeID[i],][.N]$sq + 1
cat('\n')
cat('===========================================\n')
cat('train[', i, ']\n')
print(train <- dsmp[(lst_sq - data_len + 1):lst_sq])
ctr <- train$sq[1]:(range(train$sq)[2] + hrz1)
cat('\n')
cat('-------------------------------------------\n')
cat('train_test[', i, ']\n')
print(train_test <- dsmp[sq %in% ctr])
srm <- train[, .(index, close)] %>%
tk_ts(frequency = hrz1)
if(.model == 'auto') {
srm %<>% auto.arima(d = .d, D = .D)
} else if (.model == 'Arima') {
srm %<>% Arima(order = .order, seasonal = .seasonal)
}
srm %<>%
forecast(h = hrz1) %>%
tk_tbl %>%
dplyr::mutate(index = train_test[(.N - hrz1 + 1):.N,]$index,
mk.price = train_test[(.N - hrz1 + 1):.N,]$close) %>%
dplyr::rename(fc.price = `Point Forecast`) %>%
dplyr::select(index, mk.price, fc.price)
cat('\n')
cat('-------------------------------------------\n')
cat('forecast[', i, ']\n')
print(srm %>% as.data.table)
saveRDS(srm, paste0(
dtr, 'data/fx/USDJPY/sarima_', .model, '_', data_len,
'_', hrz1, '.', as_date(srm$index[1]), '.rds'))
cat('\n')
cat(i, '=', paste0('~/data/fx/USDJPY/sarima_', .model, '_',
data_len, '_', hrz1, '.',
as_date(srm$index[1]), '.rds saved!\n'))
cat('\n\n')
rm(srm)
}
})
return(tmp)
}
source('function/sarima.R')
# --------- eval=FALSE ---------
timeID <- unique(dsmp$date)
bse <- dsmp[year == 2016]$date[1] #"2016-01-04" #1st trading date in 2nd year
timeID %<>% .[. >= bse]
#timeID %<>% .[. >= as_date('2016-01-04')]
data_len <- 7200 #last 7200 observations dsmp[(.N - (data_len - 1)):.N]
hrz1 <- 1440
.model = c('auto', 'Arima')
.model = c('auto', 'Arima')
.d = NA
.D = NA
.order = c(0, 0, 0)
.seasonal = c(0, 0, 0)
llply(.model, function(md) {
sarima(timeID = timeID, dsmp,
data_len = data_len, hrz1 = hrz1,
.model = md,
.d = .d, .D = .D,
.order = c(0, 0, 0),
.seasonal = c(0, 0, 0))
})
msts() & auto.arima()fablearfima()msts() & hw()After looking a bit more, I’ve came across this question where a user wanted to use the hw method to forecast half-hourly electricity demand using the taylor dataset available in the forecast package.
As Professor Rob Hyndman suggest in the response to the linked question, the double seasonal Holt Winters model method dshw from the forecast package can be used to deal with half-hourly data.
After removing the yearly seasonality parameter (seasonal.periods = 8760) in the definition of my msts object, I’ve ran the model and it provided a pretty accurate result.
How can I apply Seasonal Exponential Smoothing forecasting method to hourly data in R
msts() & tbats()I set the length of data as weekly (7200 minutes which is 5 trading days) to forecast 1440 minutes (1440 minutes is a trading day).
Chapter 7 Multivariate TS Analysis in Introduction to Time Series Analysis and Forecasting in R, but I use univariate due to some errors as mentioned in beginning.
I tried to use weeks(1), months(1), months(3), years(1) but there is not constant observations, we can refer to The seasonal period.
Here I filter up the data as below :
5 days * 1440 mins = 7200 mins = weekly22 days * 1440 mins = 31680 mins = monthly3 months * 22 days * 1440 mins = 95040 mins = quarterly52 weeks * 5 days * 1440 mins = 374400 mins = yearlyI set the length of data as monthly (22 days * 1440 mins = 31680 mins minutes which is 22 trading days), nested seasonal weekly (5 days * 1440 mins = 7200 mins minutes which is 5 trading days) and 1440 minutes (1440 minutes is a trading day) to forecast 1440 minutes (1440 minutes is a trading day).
22 days * 1440 mins = 31680 mins5 days * 1440 mins = 7200 mins1440 mins1440 mins# --------- eval=FALSE ---------
timeID <- unique(dsmp$date)
bse <- dsmp[year == 2016]$date[1] #"2016-01-04" #1st trading date in 2nd year
timeID %<>% .[. >= bse]
#timeID %<>% .[. >= as_date('2016-01-04')]
data_len <- 7200 #last 7200 observations dsmp[(.N - (data_len - 1)):.N]
hrz1 <- 1440
hrz2 <- 1440
llply(ets.m, function(md) {
mstseas(timeID = timeID, dsmp,
data_len = data_len, hrz1 = hrz1,
hrz2 = hrz2, .model = md)
})
I set the length of data as quarterly (3 months * 22 days * 1440 mins = 95040 mins minutes which is 66 trading days), nested seasonal weekly (5 days * 1440 mins = 7200 mins minutes which is 5 trading days) and 1440 minutes (1440 minutes is a trading day) to forecast 1440 minutes (1440 minutes is a trading day).
3 months * 22 days * 1440 mins = 95040 mins5 days * 1440 mins = 7200 mins1440 mins1440 minsI set the length of data as yearly (52 weeks * 5 days * 1440 mins = 374400 mins minutes which is 260 trading days), nested seasonal weekly (5 days * 1440 mins = 7200 mins minutes which is 5 trading days) and 1440 minutes (1440 minutes is a trading day) to forecast 1440 minutes (1440 minutes is a trading day).
52 weeks * 5 days * 1440 mins = 374400 mins5 days * 1440 mins = 7200 mins1440 mins1440 minsI set the length of data as quarterly (3 months * 22 days * 1440 mins = 95040 mins minutes which is 66 trading days), nested seasonal monthly (22 days * 1440 mins = 31680 mins minutes which is 22 trading days) and 1440 minutes (1440 minutes is a trading day) to forecast 1440 minutes (1440 minutes is a trading day).
3 months * 22 days * 1440 mins = 95040 mins22 days * 1440 mins = 31680 mins1440 mins1440 minsHere I read the saved models.
Due to the models only forecast 1440 mins (but not 7200 mins) in advance, here I no need to filter the forecast price.
From final stage models comparison, we know that ******* is the βest model.
Next papers will compare tbats, midas, sarimax etc.
It’s useful to record some information about how your file was created.
suppressMessages(require('dplyr', quietly = TRUE))
suppressMessages(require('magrittr', quietly = TRUE))
suppressMessages(require('formattable', quietly = TRUE))
suppressMessages(require('knitr', quietly = TRUE))
suppressMessages(require('kableExtra', quietly = TRUE))
sys1 <- devtools::session_info()$platform %>%
unlist %>% data.frame(Category = names(.), session_info = .)
rownames(sys1) <- NULL
sys2 <- data.frame(Sys.info()) %>%
dplyr::mutate(Category = rownames(.)) %>% .[2:1]
names(sys2)[2] <- c('Sys.info')
rownames(sys2) <- NULL
if (nrow(sys1) == 9 & nrow(sys2) == 8) {
sys2 %<>% rbind(., data.frame(
Category = 'Current time',
Sys.info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
} else {
sys1 %<>% rbind(., data.frame(
Category = 'Current time',
session_info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
}
sys <- cbind(sys1, sys2) %>%
kbl(caption = 'Additional session information:') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
row_spec(0, background = 'DimGrey', color = 'yellow') %>%
column_spec(1, background = 'CornflowerBlue', color = 'red') %>%
column_spec(2, background = 'grey', color = 'black') %>%
column_spec(3, background = 'CornflowerBlue', color = 'blue') %>%
column_spec(4, background = 'grey', color = 'white') %>%
row_spec(9, bold = T, color = 'yellow', background = '#D7261E')
rm(sys1, sys2)
sys
| Category | session_info | Category | Sys.info |
|---|---|---|---|
| version | R version 4.0.3 (2020-10-10) | sysname | Windows |
| os | Windows 10 x64 | release | 10 x64 |
| system | x86_64, mingw32 | version | build 19042 |
| ui | RTerm | nodename | SCIBROKES-TRADI |
| language | en | machine | x86-64 |
| collate | English_World.1252 | login | Owner |
| ctype | English_World.1252 | user | Owner |
| tz | Asia/Tokyo | effective_user | Owner |
| date | 2021-02-03 | Current time | 2021-02-03 04:32:31 JST<U+0001F5FE> |
ARCH in MeanARIMA(p,d,q)参数最优化Powered by - Copyright® Intellectual Property Rights of Sςιβrοκεrs Trαdιηg ®️ 経営企業